library(here)
library(tidyverse)
library(skimr)
library(magrittr)
library(Hmisc)
library(summarytools)
library(gmodels)camp_primary <- read_csv(here("homework1", "camp_primary.csv"), col_names = T)Parsed with column specification:
cols(
id = col_double(),
age_rz = col_double(),
POSFEV = col_double(),
visitc = col_double(),
fdays = col_double(),
gender = col_double(),
ethnic = col_double(),
trt = col_double(),
visit = col_double()
)
camp_primary %>%
skim()── Data Summary ────────────────────────
Values
Name Piped data
Number of rows 6584
Number of columns 9
_______________________
Column type frequency:
numeric 9
________________________
Group variables None
── Variable type: numeric ────────────────────────────────────────────────────────────────────
skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100
1 id 0 1 530. 302. 1 266 538 793 1041
2 age_rz 0 1 8.35 2.15 5 7 8 10 13
3 POSFEV 15 0.998 2.22 0.711 0.570 1.72 2.1 2.6 5.88
4 visitc 0 1 20.6 16.0 0 4 16 36 48
5 fdays 0 1 624. 483. -1 125 510 1092 1522
6 gender 0 1 0.407 0.491 0 0 0 1 1
7 ethnic 0 1 0.586 0.978 0 0 0 1 3
8 trt 0 1 0.909 0.830 0 0 1 2 2
9 visit 0 1 5.42 2.87 1 3 5 8 10
hist
1 ▇▇▇▇▇
2 ▆▇▃▆▂
3 ▃▇▂▁▁
4 ▇▅▅▂▅
5 ▇▅▅▃▃
6 ▇▁▁▁▆
7 ▇▂▁▁▁
8 ▇▁▆▁▆
9 ▇▇▇▇▇
POSFEV has 15 missing values.
## find those missing values
camp_primary %>%
filter(is.na(POSFEV))
Drop missing values
camp_primary %<>%
filter(!is.na(POSFEV))
camp_primary %>%
skim(POSFEV)── Data Summary ────────────────────────
Values
Name Piped data
Number of rows 6569
Number of columns 9
_______________________
Column type frequency:
numeric 1
________________________
Group variables None
── Variable type: numeric ────────────────────────────────────────────────────────────────────
skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
1 POSFEV 0 1 2.22 0.711 0.570 1.72 2.1 2.6 5.88 ▃▇▂▁▁
label(camp_primary$trt) <- "treatment group"
label(camp_primary$age_rz) <- "age at rz, years"
label(camp_primary$POSFEV) <- "post-bronchodilator FEV1 in liters"
label(camp_primary$visitc) <- "months since rz"
label(camp_primary$fdays) <- "days since rz"
label(camp_primary$gender) <- "gender"
label(camp_primary$ethnic) <- "ethnicity"camp_primary %<>%
mutate(gender = factor(gender, levels = c("0", "1"),
labels = c("male", "female"))) %>%
mutate(ethnic = factor(ethnic, levels = c("0", "1", "2", "3"),
labels = c("White", "Black", "Hispanic", "Other"))) %>%
mutate(trt = factor(trt, levels = c("0", "1", "2"),
labels = c("placebo", "budesonide", "nedocromil")))camp_primary <- camp_primary %>%
group_by(id) %>%
mutate(nobs=n()) %>%
ungroup()
label(camp_primary$nobs) <- "number of observations"camp_primary %>%
group_by(id) %>%
filter(visit == max(visit)) %>%
ungroup() %>%
summarise(obs = n(),
mean_obs = mean(nobs),
sd_obs = sd(nobs),
min_obs = min(nobs),
max_obs = max(nobs))Summary of the number of follow-up visits that were completed by the 695 children:
mean = 9.45 (sd=1.48), min=1, max=10, N=695
camp_primary_nobs <- camp_primary %>%
group_by(id) %>%
filter(visit == max(visit)) %>%
ungroup()
camp_primary_nobs %>%
summarytools::freq(nobs, order = "-freq", totals = FALSE)Frequencies
camp_primary_nobs$nobs
Label: number of observations
Freq % Valid % Valid Cum. % Total % Total Cum.
---------- ------ --------- -------------- --------- --------------
1 3 0.43 0.43 0.43 0.43
3 4 0.58 1.01 0.58 1.01
2 6 0.86 1.87 0.86 1.87
6 7 1.01 2.88 1.01 2.88
4 8 1.15 4.03 1.15 4.03
5 9 1.29 5.32 1.29 5.32
7 16 2.30 7.63 2.30 7.63
8 18 2.59 10.22 2.59 10.22
9 73 10.50 20.72 10.50 20.72
10 551 79.28 100.00 79.28 100.00
<NA> 0 0.00 100.00
Summarize the baseline characteristics (age, gender and ethnicity) of the children separately for each treatment group. Include the number of children receiving each treatment.
camp_primary_nobs %>%
group_by(trt) %>%
summarise(obs = n(),
mean_age = round(mean(age_rz), 2),
sd_age = round(sd(age_rz), 2),
min_age = min(age_rz),
max_age = max(age_rz)) %>%
ungroup()`summarise()` ungrouping output (override with `.groups` argument)
## gender
with(camp_primary_nobs, CrossTable(trt, gender,
digits = 2, prop.r = T, prop.c = T, prop.t = F,
prop.chisq = F, format = "SPSS"))
Cell Contents
|-------------------------|
| Count |
| Row Percent |
| Column Percent |
|-------------------------|
Total Observations in Table: 695
| gender
trt | male | female | Row Total |
-------------|-----------|-----------|-----------|
placebo | 146 | 129 | 275 |
| 53.09% | 46.91% | 39.57% |
| 35.44% | 45.58% | |
-------------|-----------|-----------|-----------|
budesonide | 126 | 84 | 210 |
| 60.00% | 40.00% | 30.22% |
| 30.58% | 29.68% | |
-------------|-----------|-----------|-----------|
nedocromil | 140 | 70 | 210 |
| 66.67% | 33.33% | 30.22% |
| 33.98% | 24.73% | |
-------------|-----------|-----------|-----------|
Column Total | 412 | 283 | 695 |
| 59.28% | 40.72% | |
-------------|-----------|-----------|-----------|
## ethnicity
with(camp_primary_nobs, CrossTable(trt, ethnic,
digits = 2, prop.r = T, prop.c = T, prop.t = F,
prop.chisq = F, format = "SPSS"))
Cell Contents
|-------------------------|
| Count |
| Row Percent |
| Column Percent |
|-------------------------|
Total Observations in Table: 695
| ethnic
trt | White | Black | Hispanic | Other | Row Total |
-------------|-----------|-----------|-----------|-----------|-----------|
placebo | 194 | 35 | 25 | 21 | 275 |
| 70.55% | 12.73% | 9.09% | 7.64% | 39.57% |
| 40.50% | 39.33% | 36.76% | 35.59% | |
-------------|-----------|-----------|-----------|-----------|-----------|
budesonide | 142 | 27 | 20 | 21 | 210 |
| 67.62% | 12.86% | 9.52% | 10.00% | 30.22% |
| 29.65% | 30.34% | 29.41% | 35.59% | |
-------------|-----------|-----------|-----------|-----------|-----------|
nedocromil | 143 | 27 | 23 | 17 | 210 |
| 68.10% | 12.86% | 10.95% | 8.10% | 30.22% |
| 29.85% | 30.34% | 33.82% | 28.81% | |
-------------|-----------|-----------|-----------|-----------|-----------|
Column Total | 479 | 89 | 68 | 59 | 695 |
| 68.92% | 12.81% | 9.78% | 8.49% | |
-------------|-----------|-----------|-----------|-----------|-----------|
Our analysis will focus on comparing pulmonary function over time, separately for each treatment group. In this section, you will explore the patterns of pulmonary function over time and determine a model to compare the trends in the mean pulmonary function over time across the three treatment groups.
# generate some summaries of POSFEV by visits for plotting
visitc.summaries <-
camp_primary %>%
group_by(visitc) %>%
summarise(avgfev = mean(POSFEV),
sdfev = sd(POSFEV),
medfev = median(POSFEV),
q75fev = quantile(POSFEV, 0.75),
q25fev = quantile(POSFEV, 0.25))`summarise()` ungrouping output (override with `.groups` argument)
print(visitc.summaries)# get the number of sample
n <- length(unique(camp_primary$id))
# Sample mean + 95% CI for the mean
visitc.summaries %>%
ggplot() +
geom_errorbar(aes(x = visitc, ymin = avgfev - 2*sdfev/sqrt(n),
ymax = avgfev + 2*sdfev/sqrt(n))) +
geom_point(aes(x = visitc, y = avgfev), color = "red") +
geom_line(aes(x = visitc, y = avgfev), color = "red") +
scale_x_continuous(breaks = c(0, 2, 3, 12, 16, 24, 28, 36, 40, 48)) +
ylim(1.5, 3) +
theme_classic() +
xlab("Months since randomization") +
ylab("post-\n bronchodilator \n FEV1, liters") +
ggtitle("Mean FEV1 changes over the 48 months of the primary CAMP study") +
labs(subtitle = "Sample mean + 95% CI for the population mean among 695 participants") +
theme(axis.title.y = element_text(angle = 0, vjust = 0.5, size = 10),
axis.title.x = element_text(size = 10))
Create a figure displaying the association between post-bronchodilator FEV1 and time separately in each treatment group. Allow the focus of your graph to describe how the mean FEV1 changes for each treatment group over the primary CAMP study period.
# generate some summaries of POSFEV grouped by visits and treatment groups for plotting
trt.summaries <-
camp_primary %>%
group_by(visitc, trt) %>%
summarise(avgfev = mean(POSFEV),
sdfev = sd(POSFEV),
medfev = median(POSFEV),
q75fev = quantile(POSFEV, 0.75),
q25fev = quantile(POSFEV, 0.25))`summarise()` regrouping output by 'visitc' (override with `.groups` argument)
print(trt.summaries)# Sample mean + 95% CI for the mean grouped by treamtments
trt.summaries %>%
ggplot() +
geom_errorbar(aes(x = visitc, ymin = avgfev - 2*sdfev/sqrt(n),
ymax = avgfev + 2*sdfev/sqrt(n),
color = trt),
width = 1,
position = position_dodge(width=0.9)) +
geom_point(aes(x = visitc, y = avgfev, color = trt),
position = position_dodge(width=0.9)) +
geom_line(aes(x = visitc, y = avgfev,
group = trt, color = trt),
position = position_dodge(width=0.9)) +
scale_x_continuous(breaks = c(0, 2, 3, 12, 16, 24, 28, 36, 40, 48)) +
ylim(1.5, 3) +
theme_classic() +
xlab("Months since randomization") +
ylab("post-\n bronchodilator \n FEV1, liters") +
ggtitle("Mean FEV1 changes over the 48 months of the primary CAMP study") +
labs(subtitle = "Sample mean + 95% CI grouped by 3 treatments for the\npopulation mean among 695 participants") +
theme(axis.title.y = element_text(angle = 0, vjust = 0.5, size = 10),
axis.title.x = element_text(size = 10)) +
theme(legend.position = c(0.9, 0.2)) +
labs(color="Treatment groups") + ## legend's title
scale_color_viridis_d() ## use color-blind friendly palettes
camp_primary %>%
ggplot() +
geom_line(aes(group = id, x = visitc, y = POSFEV), alpha = 0.3) +
scale_x_continuous(breaks = c(0, 2, 3, 12, 16, 24, 28, 36, 40, 48)) +
ylim(0.5, 6) +
theme_classic() +
xlab("Months since randomization") +
ylab("post-\n bronchodilator \n FEV1, liters") +
ggtitle("FEV1 changes over the 48 months of the primary CAMP study") +
labs(subtitle = "Spaghetti plot for 695 individual children") +
theme(axis.title.y = element_text(angle = 0, vjust = 0.5, size = 10),
axis.title.x = element_text(size = 10)) +
stat_summary(fun.y = mean, geom = "point", lwd = 2, colour = "red",
aes(x = visitc, y = POSFEV)) + ## add children average per visit
stat_smooth(aes(group = 1, x = visitc, y = POSFEV)) ## add children mean lowess line`fun.y` is deprecated. Use `fun` instead.
camp_primary %>%
ggplot() +
geom_line(aes(group = id, x = visitc, y = POSFEV,
color = trt),
alpha = 0.3) +
scale_x_continuous(breaks = c(0, 2, 3, 12, 16, 24, 28, 36, 40, 48)) +
ylim(0.5, 6) +
theme_classic() +
xlab("Months since randomization") +
ylab("post-\n bronchodilator \n FEV1, liters") +
ggtitle("FEV1 changes over the 48 months of the primary CAMP study") +
labs(subtitle = "Spaghetti plot for 695 individual children grouped by 3 treatments") +
theme(axis.title.y = element_text(angle = 0, vjust = 0.5, size = 10),
axis.title.x = element_text(size = 10)) +
stat_summary(fun.y = mean, geom = "point", lwd = 2, colour = "red",
aes(x = visitc, y = POSFEV)) + ## add children average per visit
stat_smooth(aes(group = 1, x = visitc, y = POSFEV)) + ## add children mean lowess line
facet_wrap("trt", strip.position = "top") +
theme(legend.position='none')`fun.y` is deprecated. Use `fun` instead.
Marginal model for the mean FEV1
\[
E(Y_{ij}|X_{ij}) = \beta_0 + \beta_1visitc_{ij} + \beta_2I(trt_i=budesonide) +
\beta_3I(trt_i=nedocromil) + \\
\beta_4visitc_{ij}*I(trt_i=budesonide) +
\beta_5visitc_{ij}*I(trt_i=nedocromil); \\
i = 1,2,3,...,695; j=1,2,3,...,10
\]
null and alternative hypothesis
\[
H_0: \beta_4=\beta_5=0 \\
H_a: \beta_4=\beta_5 \ne 0
\]
Often in randomized controlled trials with longitudinal designs (similar to the CAMP), researchers are not willing to assume a priori that the mean of the primary outcome will change over time according to a parametric model (e.g. linear, quadratic, etc.). Instead researchers fit a model that allows the mean of the primary outcome to be estimated separately at each assessment time; i.e. allowing time to be a factor, not a continuous exposure. Write out a model for the mean FEV1 as a function of time which treats time as a factor (i.e. estimates a separate mean FEV1 at each assessment time) and allows these means to vary across treatment group. Using the regression coefficients from your model, specify the null and alternative hypothesis for testing whether the mean changes over time in FEV1 differ across treatment group.
Model:
\(trt_{ij}\): treatment groups: placebo; budesonide; nedocromil
\(visitc_{ij}\): baseline and 48 months since randomization: 0, 2, 4, 12, 16, 24, 28, 36, 40, 48
Propose a model:
\[ E(Y_{ij}|X_{ij}) = \beta_0 + \beta_1I(visitc_{ij}=2 months) + \beta_2I(visitc_{ij}=4 months) +...+\beta_9I(visitc_{ij}=48 months) + \\ \beta_{10}I(trt_i=budesonide) + \beta_{11}I(trt_i=nedocromil) + \\ \beta_{12}I(visitc_{ij}=2 months)*I(trt_i=budesonide) +...+\beta_{20}I(visitc_{ij}=48 months)*I(trt_i=budesonide) +\\ \beta_{21}I(visitc_{ij}=2 months)*I(trt_i=nedocromil) +...+\beta_{29}I(visitc_{ij}=48 months)*I(trt_i=nedocromil) ;\\ i= 1, 2,..., 695; j=1,2,3,...,10 \]
null and alternative hypothesis:
\[
H_0: \beta_{12}=\beta_{13}=...=\beta_{29}=0 \\
H_a: \beta_{12}=\beta_{13}=...=\beta_{29}\ne 0
\]
Create a set of residuals to allow exploration of the variance and covariance in the CAMP data. Specifically, use a standard regression model (assuming independence of observations) to fit the mean model you proposed in Part I Section f. Use the fit of the model to compute and save the residuals.
mod1 <- lm(data = camp_primary, POSFEV ~ factor(visitc)*trt)
camp_primary$resid1 <- residuals(mod1)
head(camp_primary, 10)
# Reshape the data to wide format
camp.wide <-
camp_primary %>%
select(id, visitc, trt, resid1) %>%
pivot_wider(names_from = "visitc", ## the column name you want
names_prefix = "month", ## modifies your new column names
values_from = "resid1") ## the value col in the long data
head(camp.wide, 10)options(digits = 3)
camp.wide %>%
select(!c(id,trt)) %>%
cov(use = "na.or.complete") month0 month2 month4 month12 month16 month24 month28 month36 month40 month48
month0 0.249 0.238 0.241 0.268 0.271 0.295 0.304 0.323 0.329 0.342
month2 0.238 0.245 0.241 0.267 0.272 0.296 0.303 0.322 0.328 0.340
month4 0.241 0.241 0.254 0.272 0.279 0.300 0.310 0.329 0.335 0.349
month12 0.268 0.267 0.272 0.316 0.315 0.342 0.353 0.372 0.377 0.388
month16 0.271 0.272 0.279 0.315 0.340 0.356 0.367 0.390 0.393 0.403
month24 0.295 0.296 0.300 0.342 0.356 0.407 0.411 0.437 0.441 0.456
month28 0.304 0.303 0.310 0.353 0.367 0.411 0.440 0.456 0.463 0.478
month36 0.323 0.322 0.329 0.372 0.390 0.437 0.456 0.512 0.510 0.534
month40 0.329 0.328 0.335 0.377 0.393 0.441 0.463 0.510 0.538 0.556
month48 0.342 0.340 0.349 0.388 0.403 0.456 0.478 0.534 0.556 0.617
options(digits = 3)
camp.wide %>%
filter(trt == "placebo") %>%
select(!c(id,trt)) %>%
cov(use = "na.or.complete") month0 month2 month4 month12 month16 month24 month28 month36 month40 month48
month0 0.266 0.259 0.261 0.287 0.301 0.325 0.334 0.366 0.376 0.388
month2 0.259 0.267 0.262 0.290 0.305 0.332 0.338 0.369 0.381 0.389
month4 0.261 0.262 0.274 0.293 0.313 0.337 0.346 0.376 0.389 0.401
month12 0.287 0.290 0.293 0.339 0.349 0.380 0.387 0.423 0.432 0.440
month16 0.301 0.305 0.313 0.349 0.392 0.407 0.417 0.458 0.465 0.472
month24 0.325 0.332 0.337 0.380 0.407 0.457 0.462 0.501 0.512 0.521
month28 0.334 0.338 0.346 0.387 0.417 0.462 0.488 0.523 0.539 0.550
month36 0.366 0.369 0.376 0.423 0.458 0.501 0.523 0.596 0.603 0.624
month40 0.376 0.381 0.389 0.432 0.465 0.512 0.539 0.603 0.635 0.653
month48 0.388 0.389 0.401 0.440 0.472 0.521 0.550 0.624 0.653 0.709
options(digits = 3)
camp.wide %>%
filter(trt == "budesonide") %>%
select(!c(id,trt)) %>%
cov(use = "na.or.complete") month0 month2 month4 month12 month16 month24 month28 month36 month40 month48
month0 0.227 0.223 0.228 0.253 0.251 0.258 0.271 0.277 0.280 0.303
month2 0.223 0.234 0.233 0.257 0.257 0.265 0.274 0.282 0.282 0.306
month4 0.228 0.233 0.247 0.264 0.261 0.269 0.281 0.287 0.286 0.310
month12 0.253 0.257 0.264 0.306 0.298 0.304 0.321 0.323 0.325 0.347
month16 0.251 0.257 0.261 0.298 0.310 0.311 0.327 0.332 0.330 0.350
month24 0.258 0.265 0.269 0.304 0.311 0.337 0.346 0.358 0.358 0.387
month28 0.271 0.274 0.281 0.321 0.327 0.346 0.389 0.381 0.380 0.409
month36 0.277 0.282 0.287 0.323 0.332 0.358 0.381 0.419 0.405 0.442
month40 0.280 0.282 0.286 0.325 0.330 0.358 0.380 0.405 0.430 0.455
month48 0.303 0.306 0.310 0.347 0.350 0.387 0.409 0.442 0.455 0.535
options(digits = 3)
camp.wide %>%
filter(trt == "nedocromil") %>%
select(!c(id,trt)) %>%
cov(use = "na.or.complete") month0 month2 month4 month12 month16 month24 month28 month36 month40 month48
month0 0.253 0.230 0.231 0.261 0.257 0.297 0.304 0.319 0.322 0.326
month2 0.230 0.232 0.225 0.251 0.247 0.284 0.290 0.307 0.310 0.316
month4 0.231 0.225 0.238 0.256 0.255 0.286 0.297 0.315 0.320 0.327
month12 0.261 0.251 0.256 0.300 0.293 0.336 0.345 0.359 0.363 0.367
month16 0.257 0.247 0.255 0.293 0.306 0.341 0.348 0.366 0.368 0.373
month24 0.297 0.284 0.286 0.336 0.341 0.419 0.417 0.440 0.442 0.447
month28 0.304 0.290 0.297 0.345 0.348 0.417 0.434 0.450 0.457 0.462
month36 0.319 0.307 0.315 0.359 0.366 0.440 0.450 0.504 0.504 0.519
month40 0.322 0.310 0.320 0.363 0.368 0.442 0.457 0.504 0.531 0.540
month48 0.326 0.316 0.327 0.367 0.373 0.447 0.462 0.519 0.540 0.592
camp.wide %>%
select(!c(id,trt)) %>%
pairs
options(digits = 3)
# print fewer digits makes it easier to look at
# the correlation matix
camp.wide %>%
select(!c(id,trt)) %>%
cor(use = "na.or.complete") month0 month2 month4 month12 month16 month24 month28 month36 month40 month48
month0 1.000 0.964 0.958 0.955 0.933 0.928 0.919 0.905 0.898 0.871
month2 0.964 1.000 0.966 0.960 0.942 0.936 0.922 0.910 0.902 0.875
month4 0.958 0.966 1.000 0.960 0.948 0.932 0.928 0.913 0.906 0.881
month12 0.955 0.960 0.960 1.000 0.963 0.954 0.946 0.924 0.913 0.878
month16 0.933 0.942 0.948 0.963 1.000 0.958 0.950 0.936 0.918 0.880
month24 0.928 0.936 0.932 0.954 0.958 1.000 0.972 0.958 0.943 0.909
month28 0.919 0.922 0.928 0.946 0.950 0.972 1.000 0.961 0.952 0.918
month36 0.905 0.910 0.913 0.924 0.936 0.958 0.961 1.000 0.972 0.950
month40 0.898 0.902 0.906 0.913 0.918 0.943 0.952 0.972 1.000 0.964
month48 0.871 0.875 0.881 0.878 0.880 0.909 0.918 0.950 0.964 1.000
## Fit Linear Model Using Generalized Least Squares
library(nlme)
fit_vario <- gls(POSFEV ~ visitc + trt:visitc, data = camp_primary)
est_acf <- ACF(fit_vario, form=~1|id)
est_acfVariogram is appropriate for irregularly-spaced data or when time is measured on a continuous scale
## specify resType="response" to obtain the estimated variogram as defined in class
est_vario <- Variogram(fit_vario, form = ~visitc|id, resType="response")
sig2_hat <- var(resid(fit_vario))## set some defaults for plotting
textsize <- 1.5 ## plotting text size
chrsize <- 1 ## plotting character (point size) parameter
linesize <- 2 ## plotting line width parameter## plot variogram
par(mfrow=c(1,2),las=1)
scatter.smooth(est_vario$dist, est_vario$variog, ylim=c(0,0.5),
ylab="",xlab="Distance (months)", main=expression("Variogram: " ~ hat(gamma)(u[jk])),
pch=16, cex.main=textsize, cex.lab=textsize, cex.axis=chrsize, cex=1,
lwd=linesize, lpars=list(col='red',lty=1,lwd=linesize),xlim=c(0,50))
abline(h=sig2_hat, lty=2, col='grey',lwd=2)
legend("topleft","Loess smooth", col="red", lty=1,lwd=2,bty='n',cex=linesize)
scatter.smooth(est_vario$dist, 1-est_vario$variog/sig2_hat, pch=16,ylab="",xlab="Distance (Months)",
main=expression("Estimated Auto-correlation:" ~ hat(rho)(u[jk]) == 1-hat(gamma)(u[jk])/hat(sigma^2)(u[jk])),
cex.main=0.8, cex.lab=textsize, cex.axis=chrsize, cex=linesize/2,
lpars=list(col='red',lty=1,lwd=linesize),xlim=c(0,50))NA
NA
Not Toeplitz, exchangeable, banded k, or AR(1), so unstructured?
Fit the proposed model from PART 1.2.e with the parametric correlation model proposed in PART 2.
For each of the models, calculate the AIC.
## Fit the model using ML: unstructured variance/covariance
## treat time(visitc) as continuous
mod3.1 <- gls(data = camp_primary,
POSFEV ~ visitc+ trt + visitc:trt,
correlation = corSymm(form = ~ visit | id),
weights = varIdent(form = ~ 1 | visit))
summary(mod3.1)It takes too long to run the unsctructured model.
Refit the mean model assuming the independence and exchangeable correlation working models.
Compute the AIC for each of these two approaches.
## exchangeable correlation working models
library(doBy)
mod3.2 <- gls(data = camp_primary,
POSFEV ~ visitc+ trt + visitc:trt, ## visitc*trt
correlation = corCompSymm(form = ~ visitc | id))
summary(mod3.2)Generalized least squares fit by REML
Model: POSFEV ~ visitc + trt + visitc:trt
Data: camp_primary
Correlation Structure: Compound symmetry
Formula: ~visitc | id
Parameter estimate(s):
Rho
0.905
Coefficients:
Correlation:
(Intr) visitc trtbds trtndc vstc:trtb
visitc -0.133
trtbudesonide -0.658 0.087
trtnedocromil -0.658 0.087 0.433
visitc:trtbudesonide 0.088 -0.661 -0.132 -0.058
visitc:trtnedocromil 0.087 -0.656 -0.057 -0.132 0.434
Standardized residuals:
Min Q1 Med Q3 Max
-2.487 -0.721 -0.153 0.512 4.813
Residual standard error: 0.637
Degrees of freedom: 6569 total; 6563 residual
anova(mod3.2)Denom. DF: 6563
# esticon(mod3.2, L = m, level = 0.95, beta0 = c(0,0,0), joint.test = T)
## L, define matrix for our linear contrasts?## independence
mod3.2.2 <- gls(data = camp_primary,
POSFEV ~ visitc+ trt + visitc:trt,
correlation = NULL)
summary(mod3.2.2)Generalized least squares fit by REML
Model: POSFEV ~ visitc + trt + visitc:trt
Data: camp_primary
Coefficients:
Correlation:
(Intr) visitc trtbds trtndc vstc:trtb
visitc -0.790
trtbudesonide -0.659 0.521
trtnedocromil -0.662 0.523 0.436
visitc:trtbudesonide 0.521 -0.659 -0.791 -0.345
visitc:trtnedocromil 0.519 -0.657 -0.342 -0.789 0.433
Standardized residuals:
Min Q1 Med Q3 Max
-2.478 -0.715 -0.149 0.524 4.867
Residual standard error: 0.635
Degrees of freedom: 6569 total; 6563 residual
anova(mod3.2.2)Denom. DF: 6563
mod3.3 <- lme(data = camp_primary,
POSFEV ~ visitc+ trt + visitc:trt,
random = ~1|id, na.action=na.omit,
correlation = corExp(form = ~visitc|id))
summary(mod3.3)Linear mixed-effects model fit by REML
Data: camp_primary
Random effects:
Formula: ~1 | id
(Intercept) Residual
StdDev: 0.596 0.278
Correlation Structure: Exponential spatial correlation
Formula: ~visitc | id
Parameter estimate(s):
range
20.7
Fixed effects: POSFEV ~ visitc + trt + visitc:trt
Correlation:
(Intr) visitc trtbds trtndc vstc:trtb
visitc -0.281
trtbudesonide -0.658 0.185
trtnedocromil -0.658 0.185 0.433
visitc:trtbudesonide 0.185 -0.659 -0.281 -0.122
visitc:trtnedocromil 0.184 -0.655 -0.121 -0.280 0.432
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-4.1260 -0.4119 -0.0159 0.3804 3.9257
Number of Observations: 6569
Number of Groups: 695
anova(mod3.3)anova(mod3.2, mod3.2.2, mod3.3)Using the “best fitting” model, conduct the appropriate hypothesis test to determine if the changes in FEV1 over time are the same across the three treatment groups. HINT: use the test command.
Interpret the coefficients for “visitc” and “1.trt#c.visitc” and provide statistical support for whether there is statistical evidence of a benefit of receiving budesonide compared to placebo for promoting long term improved pulmonary function among children with asthma.
(coefficients interpretation and Wald test for interaction terms)
Note: exponential spatial correlation model for continuous time variable, AR(1) for discrete time variables
library(nlme)
## treat visitc as continuous, and assume no baseline difference for treatment groups
fit = lme(POSFEV~visitc+trt:visitc,
data=camp_primary,
random=~1|id,
na.action=na.omit,
correlation=corExp(form=~visitc|id))
summary(fit)Linear mixed-effects model fit by REML
Data: camp_primary
Random effects:
Formula: ~1 | id
(Intercept) Residual
StdDev: 0.596 0.278
Correlation Structure: Exponential spatial correlation
Formula: ~visitc | id
Parameter estimate(s):
range
20.7
Fixed effects: POSFEV ~ visitc + trt:visitc
Correlation:
(Intr) visitc vstc:trtb
visitc -0.181
visitc:trtbudesonide 0.000 -0.648
visitc:trtnedocromil -0.001 -0.644 0.432
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-4.1216 -0.4118 -0.0172 0.3803 3.9249
Number of Observations: 6569
Number of Groups: 695
## F test for interaction term
anova(fit)
# ## for your note: Wald test for two interaction terms:
#
# L = cbind(c(0,0,1,0),c(0,0,0,1))
#
# beta = fit$coefficients$fixed
#
# V = fit$varFix
#
# test.stat = t(beta) %*% L %*% solve(t(L) %*% V %*% L) %*% t(L) %*% beta
#
# test.stat
#
# 1-pchisq(test.stat,df=2)\[ E(Y_{ij}|X_{ij}) = \beta_0 + \beta_1visitc_{ij} + \beta_2visitc_{ij}*I(trt_i=budesonide) + \beta_3visitc_{ij}*I(trt_i=nedocromil); \\ i = 1,2,3,...,695; j=1,2,3,...,10 \] \(\beta_1\): the difference in mean FEV1 after randomization (\(visitc_{i(j+1)}\) vs. \(visitc_{ij}\)) in the placebo group.
\(\beta_2\): the difference between the budesonide treatment and placebo group in the mean FEV1 change after randomization (\(visitc_{i(j+1)}\) vs. \(visitc_{ij}\)).
\(\beta_3\): the difference between the nedocromil treatment and placebo group in the mean FEV1 change after randomization (\(visitc_{i(j+1)}\) vs. \(visitc_{ij}\)).
The F test for the Group * Time interaction terms is not statistically significant (\(F=0,\hspace{1.5mm} p>.05\)). It suggests there is no difference in the mean FEV1 change after randomization between both treatment groups and the placebo group. So it will not substantially harm the fit of the model if we drop the Group * Time interaction terms.
The AR(1) + random intercept model results also show there is no statistically significant difference between the nedocromil treatment and placebo group in the mean FEV1 change after randomization (\(\beta2=-0.0002,\hspace{1.5mm} p>.05\)).
Therefore, there is no statistical evidence of a benefit of receiving budesonide compared to placebo for promoting long term improved pulmonary function among children with asthma.
Question 2: Using the fit of the model, estimate the correlation between the post-bronchodilator FEV1 at baseline assessment (visitc = 0) and 12-months post randomization (i.e. visitc = 12).
## fit the random intercept + exponential model
fit_pt3_ri_exp <- lme(POSFEV~visitc + trt:visitc, data=camp_primary,
random=~1|id, correlation=corExp(form=~visitc|id), method="REML")
## calculate AIC
AIC(fit_pt3_ri_exp)[1] -2548
## can also find tau2, sigma2, and rho (calculate from range) from the summary output
summary(fit_pt3_ri_exp)Linear mixed-effects model fit by REML
Data: camp_primary
Random effects:
Formula: ~1 | id
(Intercept) Residual
StdDev: 0.596 0.278
Correlation Structure: Exponential spatial correlation
Formula: ~visitc | id
Parameter estimate(s):
range
20.7
Fixed effects: POSFEV ~ visitc + trt:visitc
Correlation:
(Intr) visitc vstc:trtb
visitc -0.181
visitc:trtbudesonide 0.000 -0.648
visitc:trtnedocromil -0.001 -0.644 0.432
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-4.1216 -0.4118 -0.0172 0.3803 3.9249
Number of Observations: 6569
Number of Groups: 695
## Part 5 Question 2: calculate model-estimated within subject correlation at 12 month lag
VarCorr(fit_pt3_ri_exp)id = pdLogChol(1)
Variance StdDev
(Intercept) 0.3552 0.596
Residual 0.0775 0.278
tau2 <- as.numeric(VarCorr(fit_pt3_ri_exp)[1,1])
tau2 ## variace of the random intercept[1] 0.355
sig2 <- as.numeric(VarCorr(fit_pt3_ri_exp)[2,1])
sig2[1] 0.0775
range <- coef(fit_pt3_ri_exp$modelStruct, unconstrained = FALSE)[2]
rangecorStruct.range
20.7
rho <- exp(-1/range)
rhocorStruct.range
0.953
(tau2 + (rho^(12))*sig2)/(tau2 + sig2)corStruct.range
0.921
\[ \tau^2 = 0.355 \\ \sigma^2=0.0771 \\ \rho=e^{-1/range}=0.953 \\ Corr(Y_{i0},Y_{i12})=\frac{\tau^2+\rho^{12}\sigma^2}{\tau^2+\sigma^2} = 0.921 \]
So, the correlation between the post-bronchodilator FEV1 at baseline assessment (visitc = 0) and 12-months post randomization is \(Corr(Y_{i0},Y_{i12})=0.921\).
Question 3: Describe an approach you could take to assess how well the model you fit describes the within subject correlation in the CAMP study.
EDA. Compare with the empirical correlation matrix using pairwise scatterplot?
Question 4: As mentioned in the introduction, the children were followed for 4 years in the primary study, after which a subset of children were followed for an additional 5 years in the continuation study. Use camp_continuation dataset, which includes the data from the primary and the continuation studies. Propose and fit a model to determine, for each treatment group separately, whether the monthly rate of change in FEV1 during the continuation study is the same as the monthly rate of change in FEV1 during the primary study. In your solution, i) provide your model for the mean including definitions for all variables and coefficients, ii) provide the three relevant hypothesis tests that you would conduct to answer the question, iii) using the same correlation model as above, fit the model and conduct the three hypothesis tests, iv) state your overall findings.
\[ E(Y_{ij}|X_{ij}) = \beta_0 + \beta_1visitc_{ij} + \beta_2visitc_{ij}*I(trt_i=budesonide) + \beta_3visitc_{ij}*I(trt_i=nedocromil) + \\ \beta_4(visitc_{ij}>52)*(visitc_{ij}-52) + \\ \beta_5(visitc_{ij}>52)*(visitc_{ij}-52)*I(trt_i=budesonide) + \\ \beta_6(visitc_{ij}>52)*(visitc_{ij}-52)*I(trt_i=nedocromil) \\ i = 1,2,3,...,695; j=1,2,3,...,16 \]
Definitions
\(trt_{ij}\): treatment groups: placebo; budesonide; nedocromil
\(visitc_{ij}\): 16 time responses across 108 months since randomization: 0, 2, 4, 12, 16, 24, 28, 36, 40, 48, 52, 60, 72, 84, 96, 108
\(visitc_{ij}>52\):
0, if follow-up months fewer than or equal 52 months;
1, if follow-up months greater than 52 months.
\(visitc_{ij}-52\): the numbers of months after 52 months.
\(\beta_1\): monthly rate of change in FEV1 in the placebo group before 52 months since randomization.
\(\beta_2\): the difference in the monthly rate of change in FEV1 between the budesonide treatment and placebo group before 52 months since randomization.
\(\beta_3\): the difference in the monthly rate of change in FEV1 between the nedocromil treatment and placebo group before 52 months since randomization.
\(\beta_4\): the difference in monthly rate of change in FEV1 in the placebo group after vs. before 52 months since randomization.
\(\beta_5\): the difference in the monthly rate of change in FEV1 between the budesonide treatment and placebo group after vs. before 52 months since randomization.
\(\beta_6\): the difference in the monthly rate of change in FEV1 between the nedocromil treatment and placebo group after vs. before 52 months since randomization.
\[ H_0 : \beta_4=\beta_5=\beta_6=0 \\ H_a : \beta_4=\beta_5=\beta_6 \ne 0 \\ \]
## read continuation file
camp_cont <- read_csv(here("homework1", "camp_continuation.csv"), col_names = T)Parsed with column specification:
cols(
id = col_double(),
age_rz = col_double(),
POSFEV = col_double(),
visitc = col_double(),
fdays = col_double(),
gender = col_double(),
ethnic = col_double(),
trt = col_double(),
visit = col_double()
)
## remove missingness
camp_cont %<>%
filter(!is.na(POSFEV))
## change trt to factor
camp_cont %<>%
mutate(trt = factor(trt, levels = c("0", "1", "2"),
labels = c("placebo", "budesonide", "nedocromil")))
## we have 16 visits
length(unique(camp_cont$visitc))[1] 16
## generate a spline term at 52 months
camp_cont %<>%
mutate(visitc_52 = visitc-52,
visitc_sp = (visitc_52>0)*visitc_52,
visitc_52_trt = visitc_52*(as.numeric(trt)-1),
visitc_sp_trt = visitc_sp*(as.numeric(trt)-1))## fit a exponential model with a random intercept and a spline term at 52 months
fit_sp <- lme(POSFEV ~ visitc + trt:visitc + visitc_sp + visitc_sp:trt,
data=camp_cont,
random=~1|id,
na.action=na.omit,
correlation=corExp(form=~visitc|id))
summary(fit_sp)Linear mixed-effects model fit by REML
Data: camp_cont
Random effects:
Formula: ~1 | id
(Intercept) Residual
StdDev: 0.000374 0.685
Correlation Structure: Exponential spatial correlation
Formula: ~visitc | id
Parameter estimate(s):
range
136
Fixed effects: POSFEV ~ visitc + trt:visitc + visitc_sp + visitc_sp:trt
Correlation:
(Intr) visitc vstc_s vstc:trtb vstc:trtn trtb:_
visitc -0.259
visitc_sp 0.054 -0.783
visitc:trtbudesonide 0.000 -0.635 0.523
visitc:trtnedocromil 0.000 -0.633 0.522 0.431
trtbudesonide:visitc_sp 0.000 0.506 -0.657 -0.796 -0.344
trtnedocromil:visitc_sp 0.000 0.503 -0.653 -0.343 -0.796 0.430
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-2.987 -0.723 -0.126 0.578 4.891
Number of Observations: 9671
Number of Groups: 695
anova(fit_sp)The F tests for the placebo and treatment spline terms are statistically significant (\(F_{placebo}=31,\hspace{1.5mm} p<.0001; \hspace{2mm} F_{trt}=4,\hspace{1.5mm} p<.0001\)). It suggests there is statistically significant difference in the monthly rate of change in FEV1 during the continuation study and the primary study in the treatment groups.
The AR(1) + random intercept model results also show there is statistically significant difference in the monthly rate of change in FEV1 in the placebo group after vs. before 52 months since randomization (\(\beta_3=-0.005,\hspace{1.5mm} p<.0001\)).
Besides, there is statistically significant difference in the monthly rate of change in FEV1 between the nedocromil treatment and placebo group after vs. before 52 months since randomization (\(\beta_6=0.004,\hspace{1.5mm} p<.005\)).
However, there is no statistically significant difference in the monthly rate of change in FEV1 between the budesonide treatment and placebo group after vs. before 52 months since randomization (\(\beta_5=0.001,\hspace{1.5mm} p>.05\)).
Therefore, the monthly rate of change in FEV1 during the continuation study is the same as the monthly rate of change in FEV1 during the primary study for the budesonide treatment group, but the nedocromil treatment and placebo group have different monthly rate of change in FEV1 during the continuation study comparing with the primary study.
# generate some summaries of POSFEV grouped by visits and treatment groups for plotting
trtcont.summaries <-
camp_cont %>%
group_by(visitc, trt) %>%
summarise(avgfev = mean(POSFEV),
sdfev = sd(POSFEV),
medfev = median(POSFEV),
q75fev = quantile(POSFEV, 0.75),
q25fev = quantile(POSFEV, 0.25))`summarise()` regrouping output by 'visitc' (override with `.groups` argument)
## ggplot
trtcont.summaries %>%
ggplot() +
geom_errorbar(aes(x = visitc, ymin = avgfev - 2*sdfev/sqrt(n),
ymax = avgfev + 2*sdfev/sqrt(n),
color = trt),
width = 1,
position = position_dodge(width=2.5)) +
geom_point(aes(x = visitc, y = avgfev, color = trt),
position = position_dodge(width=2.5)) +
geom_line(aes(x = visitc, y = avgfev,
group = trt, color = trt),
position = position_dodge(width=2.5)) +
scale_x_continuous(breaks = c(0, 2, 3, 12, 16, 24, 28, 36, 40, 48, 52, 60, 72, 84,96,108)) +
ylim(1.5, 4.1) +
theme_classic() +
xlab("Months since randomization") +
ylab("post-\n bronchodilator \n FEV1, liters") +
ggtitle("Mean FEV1 changes over the 108 months of the primary CAMP study") +
labs(subtitle = "Sample mean + 95% CI grouped by 3 treatments for the\npopulation mean among 695 participants") +
theme(axis.title.y = element_text(angle = 0, vjust = 0.5, size = 10),
axis.title.x = element_text(size = 10)) +
theme(legend.position = c(0.9, 0.2)) +
labs(color="Treatment groups") + ## legend's title
scale_color_viridis_d() ## use color-blind friendly palettesNA
NA